In [1]:
# Parameters
msgs = "Ran from Airflow 2021-10-09!"
In [2]:
import pandas as pd
import plotly.express as px
from datetime import datetime, timedelta
In [3]:
df = pd.read_csv('https://www.data.gouv.fr/fr/datasets/r/fa4ad329-14ec-4394-85a4-c5df33769dff',sep=';')
In [4]:
df.head()
Out[4]:
fra jour n_dose1 n_complet n_cum_dose1 n_cum_complet couv_dose1 couv_complet
0 FR 2020-12-27 401 51 401 51 0.0 0.0
1 FR 2020-12-28 422 38 823 89 0.0 0.0
2 FR 2020-12-29 270 17 1093 106 0.0 0.0
3 FR 2020-12-30 325 15 1418 121 0.0 0.0
4 FR 2020-12-31 255 19 1673 140 0.0 0.0
In [5]:
df = df.sort_values(by=['jour'])
df = df.reset_index(drop=True)

df['date_7days_ago'] = df['jour'].apply(lambda x: datetime.strftime(
    datetime.strptime(x, "%Y-%m-%d") - timedelta(days=7), "%Y-%m-%d"
))
In [6]:
def get_rolling_average(date, df, column):
    """Calculate rolling average from a column.
    We apply mean for each values of a specific column
    within the range of last week date and date itself
    """
    lowestDate = datetime.strftime(
        datetime.strptime(date, "%Y-%m-%d") - timedelta(days=6), "%Y-%m-%d"
    )
    return df[
        (df['jour'] >= lowestDate)
        &
        (df['jour'] <= date)
    ].mean()[column].mean().round(0)
In [7]:
df['mean'] = df['jour'].apply(lambda x: get_rolling_average(x, df, 'n_complet'))
df['mean_7days_ago'] = df['date_7days_ago'].apply(lambda x: df[df['jour'] == x]['mean'].iloc[0] if(df[df['jour'] == x].shape[0] > 0) else None)
df['evol_mean'] = df['mean'] - df['mean_7days_ago']
df['evol_mean_percentage'] = df['evol_mean'] / df['mean_7days_ago'] * 100
In [8]:
df.head(20)
Out[8]:
fra jour n_dose1 n_complet n_cum_dose1 n_cum_complet couv_dose1 couv_complet date_7days_ago mean mean_7days_ago evol_mean evol_mean_percentage
0 FR 2020-12-27 401 51 401 51 0.0 0.0 2020-12-20 51.0 NaN NaN NaN
1 FR 2020-12-28 422 38 823 89 0.0 0.0 2020-12-21 44.0 NaN NaN NaN
2 FR 2020-12-29 270 17 1093 106 0.0 0.0 2020-12-22 35.0 NaN NaN NaN
3 FR 2020-12-30 325 15 1418 121 0.0 0.0 2020-12-23 30.0 NaN NaN NaN
4 FR 2020-12-31 255 19 1673 140 0.0 0.0 2020-12-24 28.0 NaN NaN NaN
5 FR 2021-01-01 300 35 1973 175 0.0 0.0 2020-12-25 29.0 NaN NaN NaN
6 FR 2021-01-02 244 29 2217 204 0.0 0.0 2020-12-26 29.0 NaN NaN NaN
7 FR 2021-01-03 276 28 2493 232 0.0 0.0 2020-12-27 26.0 51.0 -25.0 -49.019608
8 FR 2021-01-04 1772 46 4265 278 0.0 0.0 2020-12-28 27.0 44.0 -17.0 -38.636364
9 FR 2021-01-05 5715 68 9980 346 0.0 0.0 2020-12-29 34.0 35.0 -1.0 -2.857143
10 FR 2021-01-06 11622 102 21602 448 0.0 0.0 2020-12-30 47.0 30.0 17.0 56.666667
11 FR 2021-01-07 29081 142 50683 590 0.1 0.0 2020-12-31 64.0 28.0 36.0 128.571429
12 FR 2021-01-08 36740 192 87423 782 0.2 0.0 2021-01-01 87.0 29.0 58.0 200.000000
13 FR 2021-01-09 11079 65 98502 847 0.2 0.0 2021-01-02 92.0 29.0 63.0 217.241379
14 FR 2021-01-10 5770 23 104272 870 0.2 0.0 2021-01-03 91.0 26.0 65.0 250.000000
15 FR 2021-01-11 39638 216 143910 1086 0.2 0.0 2021-01-04 115.0 27.0 88.0 325.925926
16 FR 2021-01-12 61674 312 205584 1398 0.4 0.0 2021-01-05 150.0 34.0 116.0 341.176471
17 FR 2021-01-13 64636 414 270220 1812 0.5 0.0 2021-01-06 195.0 47.0 148.0 314.893617
18 FR 2021-01-14 82045 1190 352265 3002 0.6 0.0 2021-01-07 345.0 64.0 281.0 439.062500
19 FR 2021-01-15 68524 576 420789 3578 0.7 0.0 2021-01-08 399.0 87.0 312.0 358.620690
In [9]:
fig = px.line(df, x="jour", y="mean", hover_name="n_complet",
        line_shape="spline", render_mode="svg")
fig.show()
In [10]:
fig.write_image("/tmp/covid.png")
In [ ]: